******************
**# A. IPUMS data
******************
use  "analysis/ipums_1970_1990", clear
keep if age >= 22 & age <= 65 & empstat == 1 & year == 1980
gen women_perwt = perwt*(sex==2)
collapse (sum) *perwt, by(ind1990)
gen women_share = women_perwt/perwt
keep women_share ind1990

_strip_labels ind1990
tostring ind1990, replace
replace ind1990 = "0" + ind1990 if length(ind1990) == 2
replace ind1990 = "00" + ind1990 if length(ind1990) == 1
gen census90 = ind1990
destring ind1990, replace

label var women_share "Female share of 1980 ind1990 emp"
save "processing/women_shares", replace
  
  
**********************************
**# B. Concord SIC and Census ind 
**********************************
use "processing/SIC87_Final_Dataset.dta", clear
	* These are only manuf sic sectors
keep if year == 1979
keep sic
tostring sic, gen(sic4)

* Merge to SIC
joinby sic4 using "raw/sic4_2_census90_v2.dta", unmatched(both)
assert _m!=1
	* m=2: 63/13 unique sic4/census m=2 codes (vs 399/76 sic/census m=3 codes) not in SIC dataset but in concordance
	*	   Only	6 manuf codes (Census/SIC)
	*		232/2451 Trailers and semi-trailers for housing or camping
	*		322/3572 Computer storage devices and parts nspf
	*		351/3715 Truck trailers
	*		362/3764 Missile and rocket engines
	*		9100 Scrap & waste (SIC only)
	*		9200 Used/second hand (SIC only)
	*		9100 Special classification (SIC only)
	* m=3: 399 unique sic codes for 400 obs 
	*		Both census 272 and 280 map to sic 3341
drop if _m==2
drop _m

assert !missing(sic4)
destring sic4, replace
assert sic4==sic
drop sic4


************************************************
**# C. Concord Census IPUMS womens share to SIC
************************************************
merge m:1 census90 using "processing/women_shares"
assert _m!=1
destring census, replace
bro if inrange(census,100,399) & _m==2
	* m=2:  152 census inds but only 6 manuf census inds that in womens ipums emp data but not in industry data
	*		144 census codes not in ipums emp data, only 6 manuf codes
	*		122 Food industries, nes	  
	*		140	Dyeing and finishing textiles, except wool and knit goods
	*		301 Metal industries, nes
	* 		332 Machinery, nes
	*		350 Electrical machinery, equipment, and supplies nes
keep if _m==3

* Aggregate as necessary 
bys sic: gen N=_N
tab N
bys sic: ereplace women_share=mean(women_share)
assert !missing(sic)
keep women_* sic
duplicates drop

save "processing/sic_women_shares", replace
